{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test AutoFileGroup\n",
    "This notebook tests the grouping functionality in AutoFileGroup class"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import yaml\n",
    "from loguru import logger\n",
    "\n",
    "from autocoder.agent.auto_filegroup import AutoFileGroup\n",
    "import byzerllm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create test data directory and files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Created 5 test files in .tmp/test_auto_filegroup\n"
     ]
    }
   ],
   "source": [
    "# Create test directory\n",
    "test_dir = \".tmp/test_auto_filegroup\"\n",
    "os.makedirs(test_dir, exist_ok=True)\n",
    "\n",
    "# Create test YAML files\n",
    "test_data = [\n",
    "    {\n",
    "        \"name\": \"001_feature1.yml\",\n",
    "        \"content\": {\n",
    "            \"query\": \"Add input validation to login function\",\n",
    "            \"urls\": [\n",
    "                \"/src/auth/login.py\",\n",
    "                \"/src/utils/validation.py\"\n",
    "            ]\n",
    "        }\n",
    "    },\n",
    "    {\n",
    "        \"name\": \"002_feature2.yml\",\n",
    "        \"content\": {\n",
    "            \"query\": \"Implement password reset functionality\",\n",
    "            \"urls\": [\n",
    "                \"/src/auth/password_reset.py\",\n",
    "                \"/src/auth/email.py\"\n",
    "            ]\n",
    "        }\n",
    "    },\n",
    "    {\n",
    "        \"name\": \"003_feature3.yml\", \n",
    "        \"content\": {\n",
    "            \"query\": \"Add caching to database queries\",\n",
    "            \"urls\": [\n",
    "                \"/src/db/cache.py\",\n",
    "                \"/src/db/queries.py\"\n",
    "            ]\n",
    "        }\n",
    "    },\n",
    "    {\n",
    "        \"name\": \"004_feature4.yml\",\n",
    "        \"content\": {\n",
    "            \"query\": \"Implement rate limiting for API endpoints\",\n",
    "            \"urls\": [\n",
    "                \"/src/api/rate_limiter.py\",\n",
    "                \"/src/api/middleware.py\"\n",
    "            ]\n",
    "        }\n",
    "    },\n",
    "    {\n",
    "        \"name\": \"005_feature5.yml\",\n",
    "        \"content\": {\n",
    "            \"query\": \"Add email verification after signup\",\n",
    "            \"urls\": [\n",
    "                \"/src/auth/email.py\",\n",
    "                \"/src/auth/signup.py\"\n",
    "            ]\n",
    "        }\n",
    "    }\n",
    "]\n",
    "\n",
    "# Write test files\n",
    "for item in test_data:\n",
    "    file_path = os.path.join(test_dir,\"actions\", item[\"name\"])\n",
    "    with open(file_path, \"w\", encoding=\"utf-8\") as f:\n",
    "        yaml.dump(item[\"content\"], f)\n",
    "\n",
    "print(f\"Created {len(test_data)} test files in {test_dir}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Initialize LLM and test grouping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-12-21 12:11:49.130\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mbyzerllm.utils.connect_ray\u001b[0m:\u001b[36mconnect_cluster\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mJDK 21 will be used (/Users/allwefantasy/.auto-coder/jdk-21.0.2.jdk/Contents/Home)...\u001b[0m\n",
      "2024-12-21 12:11:49,212\tINFO worker.py:1564 -- Connecting to existing Ray cluster at address: 127.0.0.1:6379...\n",
      "2024-12-21 12:11:49,244\tINFO worker.py:1740 -- Connected to Ray cluster. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "File Grouping Results:\n",
      "=====================\n",
      "\n",
      "Group 1: AutoFileGroup 功能优化与修复\n",
      "Queries:\n",
      "- 修复 @@AutoFileGroup(location: src/autocoder/agent/auto_filegroup.py) 中 action_files 排序逻辑错误\n",
      "- 优化下 @@group_by_similarity(location: src/autocoder/agent/auto_filegroup.py)\n",
      "- 在 notebooks/tests/test_auto_filegroup.ipynb 中, 修改导入路径和输出格式, 并添加执行结果输出. 在 src/autocoder/agent/auto_filegroup.py 中, 添加 FileGroup 和 FileGroups 模型, 并修改 group_by_similarity 方法返回类型为 FileGroups.\n",
      "- 在 notebooks/tests 里新建一个jupyter notebook ,测试 @src/autocoder/agent/auto_filegroup.py 中的分组功能\n",
      "- 修改 @@AutoFileGroup(location: src/autocoder/agent/auto_filegroup.py) 的初始化参数和 group_by_similarity 方法，增加 file_size_limit 限制和调整输入格式\n",
      "- 重构 @src/autocoder/agent/auto_filegroup.py 的代码,我们分组逻辑是,给定一组yaml文件,得到里面的query和urls, 一起给到一个分组 prompt 函数,该函数prompt 函数会输出如下格式 分组名: 对应的子query列表, 对应的 urls 列表,以 json格式输出\n",
      "- 在 src/autocoder/agent 目录下新增一个 auto_filegroup.py 文件, 使用prompt 函数实现一个自动分组功能. 参考 @src/autocoder/utils/__init__.py 文件中遍历 actions_dir 得到所有 yaml file, yaml file 内容典型如下:auto_merge: editblock\n",
      "Files:\n",
      "- /Users/allwefantasy/projects/auto-coder/src/autocoder/utils/__init__.py\n",
      "- /Users/allwefantasy/projects/auto-coder/src/autocoder/agent/auto_filegroup.py\n",
      "- .auto-coder/libs/llm_friendly_packages/github.com/allwefantasy/byzer-llm/README.md\n",
      "- /Users/allwefantasy/projects/auto-coder/notebooks/tests/test_auto_filegroup.ipynb\n",
      "\n",
      "Group 2: Git 操作与历史记录处理\n",
      "Queries:\n",
      "- 参考下面的代码 @self.app.get(\"/api/history/validate-and-load\", response_model=ValidationResponseWithFileNumbers)\n",
      "- 在  group_files 中,我们yaml文件名得到commit ,然后获取 diff 信息,最后也放到group_by_similarity 的prompt 里\n",
      "Files:\n",
      "- /Users/allwefantasy/projects/auto-coder/src/autocoder/utils/__init__.py\n",
      "- /Users/allwefantasy/projects/auto-coder/src/autocoder/agent/auto_filegroup.py\n",
      "- .auto-coder/libs/llm_friendly_packages/github.com/allwefantasy/byzer-llm/README.md\n",
      "\n",
      "Group 3: Claude 系统提示优化\n",
      "Queries:\n",
      "- 修复 notebooks/tests/test_extract_relevance_range.ipynb 中的执行错误，并更新代码以使用 TokenLimiter 类。\n",
      "- 在 @src/autocoder/common/code_auto_generate.py 等文件中，将 system_prompt 为 \"claude\" 时的提示内容改为 claude_sys_prompt。\n",
      "Files:\n",
      "- /Users/allwefantasy/projects/auto-coder/src/autocoder/common/code_auto_generate_editblock.py\n",
      "- /Users/allwefantasy/projects/auto-coder/src/autocoder/common/sys_prompt.py\n",
      "- .auto-coder/libs/llm_friendly_packages/github.com/allwefantasy/byzer-llm/README.md\n",
      "\n",
      "Group 4: 临时文件处理\n",
      "Queries:\n",
      "- 在 @src/autocoder/chat_auto_coder.py 中的 # Mark 处,我们需要从 yaml_config 临时生成一个 yaml file ,然后从该yaml 文件得到 args, 得到后要删除该临时文件\n",
      "Files:\n",
      "- /Users/allwefantasy/projects/auto-coder/src/autocoder/chat_auto_coder.py\n",
      "- .auto-coder/libs/llm_friendly_packages/github.com/allwefantasy/byzer-llm/README.md\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "import yaml\n",
    "from loguru import logger\n",
    "\n",
    "from autocoder.agent.auto_filegroup import AutoFileGroup\n",
    "import byzerllm\n",
    "\n",
    "llm = byzerllm.ByzerLLM.from_default_model(model=\"doubao_128k_chat\")\n",
    "\n",
    "# Create AutoFileGroup instance\n",
    "grouper = AutoFileGroup(\n",
    "    llm=llm, \n",
    "    project_dir=\"/Users/allwefantasy/projects/auto-coder\", \n",
    "    file_size_limit=100)\n",
    "\n",
    "# Test file grouping\n",
    "groups = grouper.group_files()\n",
    "\n",
    "print(\"\\nFile Grouping Results:\")\n",
    "print(\"=====================\")\n",
    "for idx, group in enumerate(groups, 1):\n",
    "    print(f\"\\nGroup {idx}: {group.name}\")\n",
    "    print(\"Queries:\")\n",
    "    for query in group.queries:\n",
    "        print(f\"- {query}\")\n",
    "    print(\"Files:\")\n",
    "    for url in group.urls:\n",
    "        print(f\"- {url}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cleanup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cleaned up test directory: .tmp/test_auto_filegroup\n"
     ]
    }
   ],
   "source": [
    "# Remove test directory and files\n",
    "import shutil\n",
    "shutil.rmtree(test_dir)\n",
    "print(f\"\\nCleaned up test directory: {test_dir}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
